#####################################
########### Data cleaning ################################



##########################################################################
### (1) Data on Authoritarian Regimes (1900-2015)  
###     from Casey, Adam E.; Lachapelle, Jean; Levitsky, Steven; Way, Lucan A., 2020, "Replication Data for: Social Revolution and Authoritarian Durability", https://doi.org/10.7910/DVN/OMDQI9
##########################################################################
#Load data from (Lachapelle et al. 2020)
minority_TSC <- readRDS("main_data.Rdata")


#Add variables from V-Dem.v12 (Coppedge et al. 2022) and create new variables
vdem=`V-Dem-CY-Full+Others-v12` 
vdem2= vdem %>% dplyr:: select( COWcode, year,e_gdppc,e_pop, e_total_oil_income_pc, v2regimpgroup)
minority_TSC <- minority_TSC %>%
  left_join(vdem2, by = c("ccode"= "COWcode","Year"= "year"))

#Transforming variables
minority_TSC=minority_TSC %>% 
  mutate(foreignsupport= ifelse(v2regimpgroup==13, 1, 0))

minority_TSC$log_oil_income_pc <- log(minority_TSC$e_total_oil_income_pc+0.001)
minority_TSC$log_e_gdppc <- log(minority_TSC$e_gdppc)
minority_TSC$log_e_pop <- log(minority_TSC$e_pop)

minority_TSC$gdp <- minority_TSC$e_gdppc *minority_TSC$e_pop
minority_TSC$Lag_gdp <- ave(minority_TSC$gdp, minority_TSC$Country, FUN = function(x) c(NA, x[-length(x)]))
minority_TSC= minority_TSC %>% mutate(gdp_growth = (gdp - Lag_gdp) / Lag_gdp * 100)

minority_TSC=minority_TSC %>% mutate(foreignsupport= ifelse(v2regimpgroup==13, 1, 0))

#add foreign sponsorship variable (any_spons) from Casey, Adam E., 2020
#https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/OWGEAR

data = read.csv("client_regimes")
minority_TSC <- left_join(data, data %>% dplyr::select(ccode, year, any_spons), 
                          by = c("ccode", "year"))


minorityreg <- minority_TSC %>%
  select(Country, Year, ccode, cus_caseid, cus_t_surv, cus_t, cus_fail, minority, allminority,
         frac_minority,e_gdppc, log_e_gdppc,gdp, gdp_growth, e_pop, log_e_pop, log_oil_income_pc, partB, persB, mil,
         mon, v2regimpgroup, foreignsupport, any_spons)


##########################################################################
### (2) Data from NAVCO 2.1
### Chenoweth, Erica; Christopher Wiley Shay, 2019, "NAVCO 2.1 Dataset", https://doi.org/10.7910/DVN/MHOXDV
##########################################################################
#Load the NAVCO 2.1 Dataset 
navco= read.csv("NAVCO-TSC-2-1.csv")

### Add variables from V-Dem.v12 (Coppedge et al. 2022)
controlvdem=`V-Dem-CY-Full+Others-v12`%>% 
  select(country_name, COWcode, year, e_boix_regime, v2caautmob, e_gdppc, e_pop)

###create a country-year variable 
controlvdem$codyear= paste(controlvdem$COWcode, controlvdem$year, sep = "")
navco_TSC13$codyear= paste(navco_TSC13$loc_cow, navco_TSC13$year, sep = "")

controlvdem=controlvdem %>% 
  select(-country_name, -COWcode,-year, -cyear )

# merge the two 
navco_TSC13= navco_TSC13 %>% left_join(controlvdem, by= "codyear")


#subset and keep observations in authoritarian regimes and remove anti-occupation
navco_autocracy2013=navco_TSC13 %>% 
  filter(e_boix_regime==0) %>% 
  filter(camp_goals!=5 &camp_goals!=-99 )

navco_autocracy2013= navco_autocracy2013 %>% 
  mutate(minority= ifelse(id %in% c("255", "81", "225","192", "122", "121","325",
                                    "388", "200", "110", "165", "246",
                                    "320", "151","403","210", "211","141", "282",
                                    "356", "215","285", "286", "309", "327"),
                          yes = "1",
                          no = 0))

navco_autocracy2013$log_e_gdppc <- log(navco_autocracy2013$e_gdppc)
navco_autocracy2013$log_e_pop <- log(navco_autocracy2013$e_pop)

#duration in days
navco_autocracy2013$start_date <- as.Date(navco_autocracy2013$start_date)
navco_autocracy2013$end_date <- as.Date(navco_autocracy2013$end_date)
navco_autocracy2013=navco_autocracy2013 %>% 
  mutate(duration_days= end_date-start_date)
navco_autocracy2013$duration_days <- as.numeric(navco_autocracy2013$duration_days)
navco_autocracy2013$log_duration_days <- log(navco_autocracy2013$duration_days)




##########################################################################
#Data for matching and synthetic control
# from (Lachapelle et al. 2020) "Replication Data for: Social Revolution and Authoritarian Durability", https://doi.org/10.7910/DVN/OMDQI9##########################################################################
##########################################################################
#Load data from (Lachapelle et al. 2020)
dat <- readRDS("synth.Rdata")

# Add variables from V-Dem.v12 (Coppedge et al. 2022)
vdem2= vdem %>% dplyr::select( COWcode,year,e_gdppc, e_pop,  e_civil_war, v2regsupgroups_13,
                               v2caautmob_osp, v2exl_legitideolcr_1, v2exl_legitideolcr_4, 
                               v2caautmob_osp, v2x_clphy)

dat= dat %>% left_join(vdem2, by= c("ccode"= "COWcode", "Year"= "year" ))

dat=dat %>% 
  mutate(foreignsupport= ifelse(v2regimpgroup==13, 1, 0))
dat <- dat %>% mutate(log_e_gdppc = log(e_gdppc), log_e_pop = log(e_pop))


# Add colonial history variable from Hensel and Mitchell (2017)
#https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/5EMETG

icowcol2= icowcol2 %>% dplyr::select( State,Type)
dat= dat %>% left_join(icowcol2, by= c("ccode"= "State"))
dat = dat %>% mutate(
  decolonize = ifelse(Type == 2, 1, 0))



#Subset authoritarian only 
authoritarian_countries <- unique(dat$Country[dat$cus_AR == 1])
dat <- subset(dat, Country %in% authoritarian_countries)

dat <- dat %>%
  select(minority,frac_minority, log_e_pop, log_e_gdppc, prev_demo, prev_partB, prev_persB,
         prev_mil, prev_mon, e_civil_war, partB, mon, persB, mil,
         decolonize, foreignsupport, v2caautmob_osp, Year, ccode, cyear,
         Country, cus_t, cus_caseid, cus_t_surv, cus_fail,
         v2exl_legitideolcr_1, v2exl_legitideolcr_4, v2x_clphy)











